
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
  "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">

<html xmlns="http://www.w3.org/1999/xhtml" lang="Python">
  <head>
    <meta http-equiv="X-UA-Compatible" content="IE=Edge" />
    <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
    <title>pandas_utilities &#8212; deepaugment 0.2.0 documentation</title>
    <link rel="stylesheet" href="../_static/alabaster.css" type="text/css" />
    <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
    <script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
    <script type="text/javascript" src="../_static/jquery.js"></script>
    <script type="text/javascript" src="../_static/underscore.js"></script>
    <script type="text/javascript" src="../_static/doctools.js"></script>
    <script type="text/javascript" src="../_static/language_data.js"></script>
    <link rel="index" title="Index" href="../genindex.html" />
    <link rel="search" title="Search" href="../search.html" />
   
  <link rel="stylesheet" href="../_static/custom.css" type="text/css" />
  
  
  <meta name="viewport" content="width=device-width, initial-scale=0.9, maximum-scale=0.9" />

  </head><body>
  

    <div class="document">
      <div class="documentwrapper">
        <div class="bodywrapper">
          

          <div class="body" role="main">
            
  <h1>Source code for pandas_utilities</h1><div class="highlight"><pre>
<span></span><span class="kn">import</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="nn">pd</span>


<div class="viewcode-block" id="df_correlations"><a class="viewcode-back" href="../lib.pandas_utilities.html#pandas_utilities.df_correlations">[docs]</a><span class="k">def</span> <span class="nf">df_correlations</span><span class="p">(</span><span class="n">df</span><span class="p">,</span> <span class="n">features</span><span class="p">):</span>
    <span class="sd">&quot;&quot;&quot; Calculates pearson and spearman correlations of each tuple of given features</span>
<span class="sd">        </span>
<span class="sd">    Args:</span>
<span class="sd">        df (pandas.DataFrame)</span>
<span class="sd">        features (list): features to calculate their correlations</span>
<span class="sd">    &quot;&quot;&quot;</span>
    <span class="n">rowList</span> <span class="o">=</span> <span class="p">[]</span>
    <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">features</span><span class="p">)):</span>
        <span class="k">for</span> <span class="n">j</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">i</span> <span class="o">+</span> <span class="mi">1</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">features</span><span class="p">)):</span>
            <span class="n">feat1</span> <span class="o">=</span> <span class="n">features</span><span class="p">[</span><span class="n">i</span><span class="p">]</span>
            <span class="n">feat2</span> <span class="o">=</span> <span class="n">features</span><span class="p">[</span><span class="n">j</span><span class="p">]</span>
            <span class="n">tmp_df</span> <span class="o">=</span> <span class="n">df</span><span class="p">[(</span><span class="n">df</span><span class="p">[</span><span class="n">feat1</span><span class="p">]</span><span class="o">.</span><span class="n">notnull</span><span class="p">())</span> <span class="o">&amp;</span> <span class="p">(</span><span class="n">df</span><span class="p">[</span><span class="n">feat2</span><span class="p">]</span><span class="o">.</span><span class="n">notnull</span><span class="p">())]</span>
            <span class="n">p_r</span><span class="p">,</span> <span class="n">p_p</span> <span class="o">=</span> <span class="n">pearsonr</span><span class="p">(</span><span class="n">tmp_df</span><span class="p">[</span><span class="n">feat1</span><span class="p">],</span> <span class="n">tmp_df</span><span class="p">[</span><span class="n">feat2</span><span class="p">])</span>
            <span class="n">s_r</span><span class="p">,</span> <span class="n">s_p</span> <span class="o">=</span> <span class="n">spearmanr</span><span class="p">(</span><span class="n">tmp_df</span><span class="p">[</span><span class="n">feat1</span><span class="p">],</span> <span class="n">tmp_df</span><span class="p">[</span><span class="n">feat2</span><span class="p">])</span>
            <span class="n">rowList</span><span class="o">.</span><span class="n">append</span><span class="p">([</span><span class="n">feat1</span><span class="p">,</span> <span class="n">feat2</span><span class="p">,</span> <span class="n">p_r</span><span class="p">,</span> <span class="n">p_p</span><span class="p">,</span> <span class="n">s_r</span><span class="p">,</span> <span class="n">s_p</span><span class="p">])</span>

    <span class="k">return</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span>
        <span class="n">rowList</span><span class="p">,</span>
        <span class="n">columns</span><span class="o">=</span><span class="p">[</span>
            <span class="s2">&quot;column1&quot;</span><span class="p">,</span>
            <span class="s2">&quot;column2&quot;</span><span class="p">,</span>
            <span class="s2">&quot;pearson_r&quot;</span><span class="p">,</span>
            <span class="s2">&quot;pearson_pvalue&quot;</span><span class="p">,</span>
            <span class="s2">&quot;spearman_r&quot;</span><span class="p">,</span>
            <span class="s2">&quot;spearman_pvalue&quot;</span><span class="p">,</span>
        <span class="p">],</span>
    <span class="p">)</span></div>


<div class="viewcode-block" id="summary"><a class="viewcode-back" href="../lib.pandas_utilities.html#pandas_utilities.summary">[docs]</a><span class="k">def</span> <span class="nf">summary</span><span class="p">(</span><span class="n">df</span><span class="p">):</span>
    <span class="sd">&quot;&quot;&quot;Summarizes the given dataframe</span>
<span class="sd">        </span>
<span class="sd">    Args:</span>
<span class="sd">        df (pandas.DataFrame): dataframe to be summarized</span>
<span class="sd">    &quot;&quot;&quot;</span>

    <span class="n">rowList</span> <span class="o">=</span> <span class="p">[]</span>
    <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="n">df</span><span class="o">.</span><span class="n">columns</span><span class="p">:</span>
        <span class="n">rowList</span><span class="o">.</span><span class="n">append</span><span class="p">(</span>
            <span class="p">[</span>
                <span class="n">col</span><span class="p">,</span>
                <span class="s2">&quot;</span><span class="si">{:.0f}</span><span class="s2">%&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">df</span><span class="p">[</span><span class="n">col</span><span class="p">]</span><span class="o">.</span><span class="n">notnull</span><span class="p">()</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span> <span class="o">/</span> <span class="nb">len</span><span class="p">(</span><span class="n">df</span><span class="p">)</span> <span class="o">*</span> <span class="mi">100</span><span class="p">),</span>
                <span class="n">df</span><span class="p">[</span><span class="n">col</span><span class="p">]</span><span class="o">.</span><span class="n">nunique</span><span class="p">(),</span>
                <span class="s2">&quot;</span><span class="si">{:.0f}</span><span class="s2">%&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">df</span><span class="p">[</span><span class="n">col</span><span class="p">]</span><span class="o">.</span><span class="n">nunique</span><span class="p">()</span> <span class="o">/</span> <span class="n">df</span><span class="p">[</span><span class="n">col</span><span class="p">]</span><span class="o">.</span><span class="n">notnull</span><span class="p">()</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span> <span class="o">*</span> <span class="mi">100</span><span class="p">),</span>
                <span class="n">df</span><span class="p">[</span><span class="n">col</span><span class="p">]</span><span class="o">.</span><span class="n">dtypes</span><span class="p">,</span>
                <span class="s2">&quot;N/A&quot;</span>
                <span class="k">if</span> <span class="n">df</span><span class="p">[</span><span class="n">col</span><span class="p">]</span><span class="o">.</span><span class="n">dtypes</span> <span class="ow">not</span> <span class="ow">in</span> <span class="p">[</span><span class="s2">&quot;float&quot;</span><span class="p">,</span> <span class="s2">&quot;int&quot;</span><span class="p">]</span>
                <span class="k">else</span> <span class="s2">&quot;</span><span class="si">{:.2f}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">df</span><span class="p">[</span><span class="n">col</span><span class="p">]</span><span class="o">.</span><span class="n">mean</span><span class="p">()),</span>
                <span class="s2">&quot;N/A&quot;</span>
                <span class="k">if</span> <span class="n">df</span><span class="p">[</span><span class="n">col</span><span class="p">]</span><span class="o">.</span><span class="n">dtypes</span> <span class="ow">not</span> <span class="ow">in</span> <span class="p">[</span><span class="s2">&quot;float&quot;</span><span class="p">,</span> <span class="s2">&quot;int&quot;</span><span class="p">]</span>
                <span class="k">else</span> <span class="s2">&quot;</span><span class="si">{:.2f}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">df</span><span class="p">[</span><span class="n">col</span><span class="p">]</span><span class="o">.</span><span class="n">std</span><span class="p">()),</span>
            <span class="p">]</span>
        <span class="p">)</span>
    <span class="n">stats</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span>
        <span class="n">rowList</span><span class="p">,</span>
        <span class="n">columns</span><span class="o">=</span><span class="p">[</span>
            <span class="s2">&quot;column&quot;</span><span class="p">,</span>
            <span class="s2">&quot;filled&quot;</span><span class="p">,</span>
            <span class="s2">&quot;n_unique&quot;</span><span class="p">,</span>
            <span class="s2">&quot;uniques/filled&quot;</span><span class="p">,</span>
            <span class="s2">&quot;dtype&quot;</span><span class="p">,</span>
            <span class="s2">&quot;mean&quot;</span><span class="p">,</span>
            <span class="s2">&quot;std&quot;</span><span class="p">,</span>
        <span class="p">],</span>
    <span class="p">)</span>

    <span class="k">return</span> <span class="n">stats</span></div>


<div class="viewcode-block" id="rank_in_group"><a class="viewcode-back" href="../lib.pandas_utilities.html#pandas_utilities.rank_in_group">[docs]</a><span class="k">def</span> <span class="nf">rank_in_group</span><span class="p">(</span><span class="n">df</span><span class="p">,</span> <span class="n">group_col</span><span class="p">,</span> <span class="n">rank_col</span><span class="p">,</span> <span class="n">rank_method</span><span class="o">=</span><span class="s2">&quot;first&quot;</span><span class="p">):</span>
    <span class="sd">&quot;&quot;&quot;Ranks a column in each group which is grouped by another column</span>
<span class="sd">        </span>
<span class="sd">    Args:</span>
<span class="sd">        df (pandas.DataFrame): dataframe to rank-in-group its column</span>
<span class="sd">        group_col (str): column to be grouped by</span>
<span class="sd">        rank_col (str): column to be ranked for</span>
<span class="sd">        rank_method (str): rank method to be the &quot;method&quot; argument of pandas.rank() function</span>
<span class="sd">        </span>
<span class="sd">    Returns:</span>
<span class="sd">        pandas.DataFrame: dataframe after the rank-in-group operation</span>
<span class="sd">    &quot;&quot;&quot;</span>

    <span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
    <span class="n">df_slice</span> <span class="o">=</span> <span class="n">df</span><span class="p">[[</span><span class="n">group_col</span><span class="p">,</span> <span class="n">rank_col</span><span class="p">]]</span><span class="o">.</span><span class="n">drop_duplicates</span><span class="p">()</span>
    <span class="n">df_slice</span><span class="p">[</span><span class="s2">&quot;ranked_</span><span class="si">{}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">rank_col</span><span class="p">)]</span> <span class="o">=</span> <span class="n">df_slice</span><span class="p">[</span><span class="n">rank_column</span><span class="p">]</span><span class="o">.</span><span class="n">rank</span><span class="p">(</span>
        <span class="n">method</span><span class="o">=</span><span class="n">rank_method</span>
    <span class="p">)</span>
    <span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">merge</span><span class="p">(</span>
        <span class="n">df</span><span class="p">,</span>
        <span class="n">df_slice</span><span class="p">[[</span><span class="n">group_col</span><span class="p">,</span> <span class="s2">&quot;ranked_</span><span class="si">{}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">rank_col</span><span class="p">)]],</span>
        <span class="n">how</span><span class="o">=</span><span class="s2">&quot;left&quot;</span><span class="p">,</span>
        <span class="n">on</span><span class="o">=</span><span class="n">group_col</span><span class="p">,</span>
    <span class="p">)</span>
    <span class="k">return</span> <span class="n">df</span></div>
</pre></div>

          </div>
          
        </div>
      </div>
      <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
        <div class="sphinxsidebarwrapper">
<h1 class="logo"><a href="../index.html">deepaugment</a></h1>








<h3>Navigation</h3>

<div class="relations">
<h3>Related Topics</h3>
<ul>
  <li><a href="../index.html">Documentation overview</a><ul>
  <li><a href="index.html">Module code</a><ul>
  </ul></li>
  </ul></li>
</ul>
</div>
<div id="searchbox" style="display: none" role="search">
  <h3>Quick search</h3>
    <div class="searchformwrapper">
    <form class="search" action="../search.html" method="get">
      <input type="text" name="q" />
      <input type="submit" value="Go" />
      <input type="hidden" name="check_keywords" value="yes" />
      <input type="hidden" name="area" value="default" />
    </form>
    </div>
</div>
<script type="text/javascript">$('#searchbox').show(0);</script>








        </div>
      </div>
      <div class="clearer"></div>
    </div>
    <div class="footer">
      &copy;2019, Baris Ozmen.
      
      |
      Powered by <a href="http://sphinx-doc.org/">Sphinx 1.8.3</a>
      &amp; <a href="https://github.com/bitprophet/alabaster">Alabaster 0.7.12</a>
      
    </div>

    

    
  </body>
</html>